/*
 Copyright 2013--Present JMM_PROGNAME
 
 This file is distributed under the terms of the JMM_PROGNAME License.
 
 You should have received a copy of the JMM_PROGNAME License.
 If not, see <JMM_PROGNAME WEBSITE>.
*/
// CREATED    : 9/13/2015
// LAST UPDATE: 9/30/2015

#include "statsxx/machine_learning/neural_network/deep_belief_network/DBN.hpp"

// STL
#include <ostream> // std::ostream
#include <vector>  // std::vector<>

// jScience
#include "jScience/linalg/Matrix.hpp" // Matrix<>
#include "jScience/stl/ostream.hpp"   // NullStream

/*
// jNeuralNet
#include "datasets.hpp" // DataSet, partition_data_set()
*/
 
// stats++
#include "statsxx/machine_learning/restricted_Boltzmann_machine/RBM.hpp"  // RBM


// greedy, layer-by-layer unsupervised training of the RBM
inline void neural_network::DBN::pretrain(
                                          const int ntrials,                // number of trials to use for each RBM initialization
                                          // -----
                                          const PropagationType prop_type,  // type of propagation of signal to use in greedy training
                                          // -----
                                          // the parameters below are identical to RBM::stochastic_gradient():
                                          const int max_epochs,             // maximum number of epochs
                                          const int nbatches,               // number of batches per epoch
                                          // -----
                                          double lr,                        // learning rate
                                          double momentum,                  // momentum
                                          // -----
                                          const double weight_penalty,      // weight penalty
                                          // -----
                                          const double sparsity_target,     // sparsity target
                                          const double sparsity_penalty,    // sparsity penalty
                                          const double sparsity_multiplier, // multiplier for q (hidden activation) EMA
                                          const double q_min,               // minimum (desired) q
                                          const double q_max,               // maximum (desired) q
                                          const double q_penalty,           // q penalty
                                          // -----
                                          const int K_begin,                // number of starting Monte Carlo (MC) itertions
                                          const int K_end,                  // number of ending Monte Carlo itertions
                                          const double K_rate,              // rate of updates to number of MC iterations
                                          // -----
                                          const bool mean_field,            // mean-field approximation
                                          // -----
                                          const double convg_criterion,     // convergence criterion
                                          const int max_no_improvement,     // max number of epochs to go without an improvement in convergence
                                          // -----
                                          const Matrix<double> &X,          // data
                                          const bool x_binomial,            // whether the data is binomial
                                          // -----
                                          std::ostream* os                  // (optional) output stream (for ea
                                          )
{
    //=========================================================
    // TRAIN THE INITIAL RBM
    //=========================================================
    // note: irrespective of the greedy training algorithm, the first RBM is trained using the actual training data
    
    this->RBM[0].initialize_weights(ntrials, X, x_binomial);
    
    this->RBM[0].stochastic_gradient(max_epochs, nbatches, lr, momentum, weight_penalty, sparsity_target, sparsity_penalty, sparsity_multiplier, q_min, q_max, q_penalty, K_begin, K_end, K_rate, mean_field, convg_criterion, max_no_improvement, X, x_binomial, os);
    
    
    //=========================================================
    // GREEDY TRAIN SUBSEQUENT LAYERS
    //=========================================================
    
    //---------------------------------------------------------
    // STOCHASTIC SAMPLING
    //---------------------------------------------------------
    if(prop_type == neural_network::DBN::PropagationType::stochastic)
    {
        // ***
        // << jmm: here is not so simple, because I think that we basically need to handle the loop over epochs HERE ... because every SINGLE epoch in stochastic_gradient must have a transformed data set that has been passed through the RBM >>
        // << ... one way to handle this would be to implement a new subroutine in RBM that handles just one epoch ... then you could call this here, BUT would still have to handle convergence, updating learning rate in an adaptive scheme, etc. ... >>
        // << ... another potential way to do things would be to call stochastic_gradient() with only a single epoch ... HOWEVER, this will definitievly incur a bunch of additional overhead and we would need to run checks for convergence, etc. ... >>
        // << ... the last option may be the best, at the expense of being technically more challenging: setup two subroutines in the DBN class, (i) one that does the single EPOCH that I was talking about before, and (ii) a separate subroutine that sits atop the EPOCH loop that one sets up ahead of time (with parameters, etc.) that one can repeatedly call that in turn calls the other subroutine for a single epoch >>
        std::cout << "tmp in pretrain(): stochastic propagation not implemented" << '\n';
        // ***
    }
    //---------------------------------------------------------
    // PSEUDO-STOCHASTIC SAMPLING
    //---------------------------------------------------------
    else if(prop_type == neural_network::DBN::PropagationType::pseudo_stochastic)
    {
        // ***
        // << jmm: there is the same issue as above ... though, similar to the TM DBN, one could pass an additional parameter to stochastic_gradient() to indicate sampling of the visible units (greedy_mean_field) on every pass ... but I don't know if there is a good basis for wanting to have this functionality built in ... >>
        // << ... the resolution to what to do here depends on what I decide to do for the stochastic propagation type above >> 
        std::cout << "tmp in pretrain(): stochastic propagation not implemented" << '\n';
        // ***
    }
    //---------------------------------------------------------
    // DETERMINISTIC SAMPLING
    //---------------------------------------------------------
    else if(prop_type == neural_network::DBN::PropagationType::deterministic)
    {
        for(auto i = 1; i < this->RBM.size(); ++i)
        {
            // form new data matrix by deterministically propagating input through all prior layers
            // << jmm: this is not efficient, as at each new layer we have to do a repeated deterministic propagation through prior layers >>
            Matrix<double> Xp(X.size(0), this->RBM[i].get_nvis());
            
            for(auto j = 0; j < X.size(0); ++j)
            {
                Vector<double> v = this->RBM_propagate(neural_network::DBN::PropagationType::deterministic, i, X.row(j));
                
                for(auto k = 0; k < v.size(); ++k)
                {
                    Xp(j,k) = v(k);
                }
            }
            
            // << jmm: I believe that with determistic propagation, even with binomial input, all subsequent layers will NOT have binomial input >> 
            
            this->RBM[i].initialize_weights(ntrials, Xp, false);
            
            this->RBM[i].stochastic_gradient(max_epochs, nbatches, lr, momentum, weight_penalty, sparsity_target, sparsity_penalty, sparsity_multiplier, q_min, q_max, q_penalty, K_begin, K_end, K_rate, mean_field, convg_criterion, max_no_improvement, Xp, false, os);
        }
    }
}
